/*************************************************************************************
*  This is a SAS error checking program.
*  We suggest that you run this program even if you use Excel. This program
*    will help you catch errors like "E2 " where the space after E2 creates a new
*    behavior designation. Make sure that you do not mix e2 and E2.
*  WARNING: close your data file in Excel before continuing.
*  This program helps you do several kinds of error checking.
*  First) Are any behaviors repeated? If so, the main program will not run properly
*  Second) Are any durations negative?
*  Third) Are there any illogical transitions? Can Z go directly to E2?
*  WARNING) Capitolization in waveform event descriptors is IMPORTANT. E1 is not the 
*       same as e1.
*************************************************************************************/
%macro PFreq1(var1);
	Proc freq data=Five5;
	table &var1;
	title 'Frequency Table of Waveform Event Transitions';
%mend;

%macro PFreq2(var1);
	Proc freq data=one;
	table &var1;
	title 'Frequency Table of Waveform Event Transitions';
%mend;

%macro p; *This is the print macro;
	proc print data=one;
	where marker1 = 1;
	run;
%mend;

%macro doit; *tests to see if the print macro should be invoked;
	%let id=%sysfunc(open(two)); *opens dataset "two" that was created in the Proc Means statement;
	%let NObs=%sysfunc(attrn(&id,NOBS)); *The number of observations in the SAS data set. It might be none, or many;
	%syscall set(id); *This is linking macro and open code variables;
	%do i=1 %to &NObs; *this works through all observations;
		%let rc=%sysfunc(fetchobs(&id,&i)); *this fetches specific observations from the data set;
		%if  &p = 1 %then %p; *If the variable p in dataset work.two equals 1 then invoke macro p;
	%end;
	%let rc=%sysfunc(close(&id));
%mend;

%macro doit2; *tests to see if the PFreq macro should be invoked;
	%let id=%sysfunc(open(two)); *opens dataset "two" that was created in the Proc Means statement;
	%let NObs=%sysfunc(attrn(&id,NOBS)); *The number of observations in the SAS data set. It might be none, or many;
	%syscall set(id); *This is linking macro and open code variables;
	%do i=1 %to &NObs; *this works through all observations;
		%let rc=%sysfunc(fetchobs(&id,&i)); *this fetches specific observations from the data set;
		%if  &p = 0 %then 
			%do;
				%PFreq1(waveform); *If the variable p in dataset work.two equals 0 then invoke macro p;
				%PFreq2(trans1);
			%end;
	%end;
	%let rc=%sysfunc(close(&id));
%mend;


options ls=100 ps=72;		*SAS output will be 100 characters per page with 72 lines per page;
data one;
	infile 'C:\Users\tebert\Documents\Elaine EPG Work\CUTSTEM\Raw Data\RawDataCutBad.csv' dsd missover firstobs=2 delimiter=',' end=last;
	length insectno$ 20 waveform$ 10 dur 8;
	input insectno$ waveform$ dur;
run;
* We need to find if there are any repete behaviors. This only works on waveform designations
	with three or fewer characters. A novel waveform Z1E will be a duplicate of Z1E2. If there
	are waveforms with more characters, adjust the program accordingly.;

Data one; Set one;
waveform=upcase(waveform);

Data one; set one; *if substr(insectno,1,1)="e" then output;

waveform=compress(upcase(waveform));

Data one; set one;
	retain w0 w1 in0;
	w1=substr(waveform,1,3);
	if insectno ne in0 then do;
	  w0='   '; marker1=0;
	 in0=insectno; * in0 is the previous value of insectno. If this changes you have a new insect.;
	               *if you start a new insect then reinitialize all variables.;
	end;
*Now test if any two consecutive behaviors are the same within one insect;
*Also test if any durations are negative.;
	if w1 ne w0 and dur>0 then do;
	   w0=w1; marker1=0;
	end;
    else do;
		marker1=1;
	end;
run;
/************************************************************************************
	Note: In this version dataset "two" will be empty if there were no problems, and
	   SAS will not generate any output if the data set is empty.
	   If there were problems, each instance will be printed.

*************************************************************************************/;

*creates a data set two that will have variable p that will be 1 if at least one marker1 =1;
proc means data=one max noprint;
var marker1;
output out=two max=p;
run;
*creates macro to call proc print;

%doit;
*******************************************************************************
* We now look at transitions as a final error check.
* This is for error checking, not data analysis.
* First count the number of instances of each waveform. Is this list correct?
 ******************************************************************************;
/*********************************************************************************
*Now check the transitions;
*To make this work we need to first drop several variables from the data set and then
    reinitialize them.
*ZZZ is a marker for the starting behavior for each insect. It should equal the
    number of insects in the data file.
*NOTE: there should never be a behavior with the designation ZZZ.
***********************************************************************************/
Data Five5; Set one;
Data one; Set one; Drop w0 w1 in0;
Data one; Set one;
	retain w0 w1 in0;
	w1=substr(waveform,1,3);
	if insectno ne in0 then do;
	  w0='XYY';
	 in0=insectno;
	end;

   if w1 ne w0 then do; 
      if w0 ne 'XYY' then trans1=catx(' to ', of w0 w1);
	  else trans1='ZZZ';
	end;
	w0=w1;
run;
data three; set one;
data one; set one; if trans1 ne "ZZZ" then output;
%doit2;
run;


Data Four; set three;
title 'Duration by waveform Output';
proc sort; by insectno waveform;
Proc means n min max mean median; var dur; by insectno waveform;
 run;
